suppressWarnings(suppressPackageStartupMessages({
library(dplyr)
library(magrittr)
library(data.table)
library(ggplot2)
library(tidyr)
library(knitr)
library(pander)
}))
opts_chunk$set(cache=TRUE, echo=TRUE, message=FALSE, warning=FALSE, dev="pdf")
Read in data
load("results/cellcount_coloc.rdata")
load("results/cellcount_mr.rdata")
mrivw$fdr <- p.adjust(mrivw$pval, "fdr")
wr$fdr <- p.adjust(wr$pval, "fdr")
Significant ivw
mrivw_sig <- subset(mrivw, fdr < 0.05)
nrow(mrivw_sig) / nrow(mrivw)
## [1] 0.01630987
Significant hits more likely when number of instruments is larger
table(mrivw_sig$nsnp) / table(mrivw$nsnp)
##
## 2 3 4 5 6 7 8
## 0.01584892 0.01602472 0.02361833 0.03318803 0.02875790 0.05765199 0.01176471
## 9
## 0.17647059
summary(glm(I(fdr < 0.05) ~ nsnp, mrivw, family="binomial")) %>% pander::pander()
| Â | Estimate | Std. Error | z value | Pr(>|z|) |
|---|---|---|---|---|
| (Intercept) | -4.464 | 0.01933 | -231 | 0 |
| nsnp | 0.157 | 0.007893 | 19.89 | 4.928e-88 |
(Dispersion parameter for binomial family taken to be 1 )
| Null deviance: | 370938 on 2226319 degrees of freedom |
| Residual deviance: | 370572 on 2226318 degrees of freedom |
Significant wr
wr_sig <- subset(wr, fdr < 0.05 & nsnp == 1)
table(wr_sig$method) %>% pander::pander()
| Inverse variance weighted | Wald ratio |
|---|---|
| 0 | 574891 |
table(wr_sig$nsnp) %>% pander::pander()
| 1 |
|---|
| 574891 |
table(wr$nsnp) %>% pander::pander()
| 1 | 2 | 3 |
|---|---|---|
| 9567947 | 5188 | 35 |
Check that the WR and coloc analyses have the same data
res$id.exposure <- paste(res$variant, res$cpg)
rescheck <- subset(res, paste(id.exposure, trait) %in% paste(wr_sig$id.exposure, wr_sig$id.outcome))
dim(rescheck) %>% pander::pander()
574891 and 15
dim(wr_sig) %>% pander::pander()
574891 and 10
Coloc
table(rescheck$nsnps > 10) %>% pander::pander()
| FALSE | TRUE |
|---|---|
| 60628 | 513203 |
table(rescheck$PP.H4.abf > 0.8 & rescheck$nsnps > 10) %>% pander::pander()
| FALSE | TRUE |
|---|---|
| 479093 | 94738 |
Find colocalising hits
wr <- wr %>% tidyr::separate(exposure, sep=" ", into=c("variant", "cpg"), remove=FALSE)
# Get significant wr
wr_sig <- subset(wr, fdr < 0.05)
# Which colocalise?
wr_coloc <- inner_join(
wr_sig,
subset(res, PP.H4.abf > 0.8 & nsnps > 10),
by=c("id.exposure", "outcome"="trait")
)
Which of these cpgs have a secondary mqtl?
cpg_trait <- unique(paste(wr_coloc$cpg.x, wr_coloc$outcome))
wr_cpg_trait <- subset(wr, paste(cpg, outcome) %in% cpg_trait)
wr_cpg_trait$cpgtrait <- paste(wr_cpg_trait$cpg, wr_cpg_trait$outcome)
table(table(wr_cpg_trait$cpgtrait)) %>% pander::pander()
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|
| 42126 | 22067 | 14897 | 6975 | 3385 | 1416 | 354 | 68 | 15 |
Separate the primary and separate coloc signals
wr_cpg_trait_primary <- subset(wr_cpg_trait, paste(id.exposure, outcome) %in% paste(wr_coloc$id.exposure, wr_coloc$id.outcome))
wr_cpg_trait_secondary <- subset(wr_cpg_trait, !paste(id.exposure, outcome) %in% paste(wr_coloc$id.exposure, wr_coloc$id.outcome))
For every cpg-trait pair, meta analyse the results to get a single assoc. Essentially creating the IVW fixed effects estimate from the Wald ratios
cpg_trait_list <- unique(paste(wr_cpg_trait$cpg, wr_cpg_trait$outcome))
metafn <- function(x)
{
m <- meta::metagen(x$b, x$se)
y <- tibble(
nsnp=nrow(x),
b=m$TE.fixed,
se=m$seTE.fixed,
pval=m$pval.fixed
)
y
}
out <- wr_cpg_trait_secondary %>%
group_by(cpgtrait) %>%
do(metafn(.))
Combine primary and secondary results
ps <- inner_join(wr_cpg_trait_primary, out, by="cpgtrait")
ps$fdr.y <- p.adjust(ps$pval.y, "fdr")
ps$fdr.x <- p.adjust(ps$pval.x, "fdr")
save(ps, file="results/primary-secondary.rdata")
Check all primary associations have fdr < 0.05
table(ps$fdr.x < 0.05) %>% pander::pander()
| TRUE |
|---|
| 50782 |
How many secondary associations have fdr < 0.05?
table(ps$fdr.y < 0.05) %>% pander::pander()
| FALSE | TRUE |
|---|---|
| 35409 | 15373 |
We should remove chromosome 6 to be careful about MHC region
table(ps$fdr.y[!grepl("6:", ps$variant)] < 0.05) %>% pander::pander()
| FALSE | TRUE |
|---|---|
| 23238 | 4900 |
Looks like a lot of this is from the MHC region.
Are the primary and secondary effects correlated?
summary(lm(b.y ~ b.x, ps)) %>% pander::pander()
| Â | Estimate | Std. Error | t value | Pr(>|t|) |
|---|---|---|---|---|
| (Intercept) | 0.001699 | 0.0003603 | 4.715 | 2.425e-06 |
| b.x | 0.08542 | 0.001436 | 59.47 | 0 |
| Observations | Residual Std. Error | \(R^2\) | Adjusted \(R^2\) |
|---|---|---|---|
| 50782 | 0.08119 | 0.06512 | 0.0651 |
This effect attenuates sharply after removing MHC
summary(lm(b.y ~ b.x, ps %>% subset(., !grepl("6:", variant)))) %>% pander::pander()
| Â | Estimate | Std. Error | t value | Pr(>|t|) |
|---|---|---|---|---|
| (Intercept) | 0.0006889 | 0.0003654 | 1.885 | 0.05942 |
| b.x | 0.03129 | 0.00202 | 15.49 | 6.276e-54 |
| Observations | Residual Std. Error | \(R^2\) | Adjusted \(R^2\) |
|---|---|---|---|
| 28138 | 0.06129 | 0.008461 | 0.008426 |
Look at the plots
ggplot(ps %>% subset(., !grepl("6:", variant)), aes(x = b.x, y = b.y)) +
geom_point(aes(colour=fdr.y < 0.05)) +
geom_smooth(method="lm") +
theme_bw() +
labs(x="Primary causal estimate", y="Secondary causal estimate", colour="Secondary\nFDR < 0.05")
A lot of significant results secondary effects, but not much of a relationship between primary and secondary hits.
How does it compare by splitting by number of instruments used for secondary association?
ggplot(ps %>% subset(., !grepl("6:", variant)), aes(x = b.x, y = b.y)) +
geom_point(aes(colour=fdr.y < 0.05)) +
geom_smooth(method="lm") +
facet_wrap(~ nsnp.y) +
labs(x="Primary causal estimate", y="Secondary causal estimate", colour="Secondary\nFDR < 0.05")
What are the primary-secondary associations?
assocfn <- function(x)
{
mod <- summary(lm(b.y ~ b.x, x))$coefficients
data.frame(nassoc=nrow(x), b=mod[2,1], se=mod[2,2], p=mod[2,4])
}
ps %>%
subset(., !grepl("6:", variant)) %>%
group_by(nsnp=nsnp.y) %>%
do(assocfn(.)) %>%
pander()
| nsnp | nassoc | b | se | p |
|---|---|---|---|---|
| 1 | 15357 | 0.04754 | 0.003452 | 6.678e-43 |
| 2 | 8519 | 0.01797 | 0.00262 | 7.324e-12 |
| 3 | 3066 | 0.01787 | 0.003329 | 8.596e-08 |
| 4 | 917 | -0.01366 | 0.005672 | 0.01622 |
| 5 | 270 | 0.02947 | 0.009731 | 0.002701 |
| 6 | 9 | -0.02171 | 0.01542 | 0.2021 |
Group by blood cell trait category
load("bc_type/bc_type.rdata")
bcm$id <- gsub("ebi-a-", "", bcm$id)
stopifnot(all(ps$id.outcome %in% bcm$id))
ps <- inner_join(ps, subset(bcm, select=c(trait, id, type)), by=c("id.outcome"="id"))
ggplot(ps %>% subset(., !grepl("6:", variant) & fdr.y < 0.05), aes(x = b.x, y = b.y)) +
geom_point(aes(colour=type)) +
geom_smooth(method="lm", aes(color=type)) +
labs(x="Primary causal estimate", y="Secondary causal estimate", colour="Secondary\nFDR < 0.05") +
scale_colour_brewer(type="qual", palette=3)
ggplot(ps %>% subset(., !grepl("6:", variant)), aes(x = b.x, y = b.y)) +
geom_point(aes(colour=type)) +
geom_smooth(method="lm", aes(color=type)) +
labs(x="Primary causal estimate", y="Secondary causal estimate", colour="Secondary\nFDR < 0.05") +
scale_colour_brewer(type="qual", palette=3)
Coefficients per type
ps %>%
subset(., !grepl("6:", variant) & fdr.y < 0.05) %>%
group_by(type=type) %>%
do(assocfn(.)) %>%
pander()
| type | nassoc | b | se | p |
|---|---|---|---|---|
| Lymphoid | 960 | 0.06633 | 0.0189 | 0.0004683 |
| Myeloid | 1702 | 0.1027 | 0.01347 | 4.143e-14 |
| Platelet | 865 | 0.07294 | 0.0195 | 0.0001955 |
| Red blood cell | 1373 | 0.1953 | 0.02217 | 3.656e-18 |
ps %>%
subset(., !grepl("6:", variant)) %>%
group_by(type=type) %>%
do(assocfn(.)) %>%
pander()
| type | nassoc | b | se | p |
|---|---|---|---|---|
| Lymphoid | 6279 | 0.01929 | 0.003735 | 2.474e-07 |
| Myeloid | 10289 | 0.02607 | 0.002961 | 1.49e-18 |
| Platelet | 4181 | 0.0223 | 0.004679 | 1.947e-06 |
| Red blood cell | 7389 | 0.06347 | 0.00542 | 2.119e-31 |
Coefficients per study
ps %>%
subset(., !grepl("6:", variant)) %>%
group_by(type=trait) %>%
do(assocfn(.)) %>%
arrange(desc(b)) %>%
pander()
| type | nassoc | b | se | p |
|---|---|---|---|---|
| Hematocrit | 384 | 0.1075 | 0.0242 | 1.163e-05 |
| Hemoglobin concentration | 313 | 0.1001 | 0.02891 | 0.0006139 |
| Immature fraction of reticulocytes | 358 | 0.0973 | 0.02656 | 0.0002865 |
| Mean corpuscular hemoglobin concentration | 406 | 0.09513 | 0.0182 | 2.753e-07 |
| High light scatter reticulocyte percentage of red cells | 565 | 0.07705 | 0.0204 | 0.0001753 |
| Reticulocyte count | 504 | 0.07481 | 0.02162 | 0.0005843 |
| High light scatter reticulocyte count | 566 | 0.07246 | 0.02015 | 0.000351 |
| Reticulocyte fraction of red cells | 492 | 0.06241 | 0.02311 | 0.007165 |
| Platelet distribution width | 241 | 0.06047 | 0.02237 | 0.007356 |
| Mean corpuscular volume | 1532 | 0.05421 | 0.0127 | 2.082e-05 |
| Red blood cell count | 1017 | 0.05015 | 0.01213 | 3.835e-05 |
| Mean corpuscular hemoglobin | 1252 | 0.04777 | 0.01343 | 0.0003876 |
| Eosinophil counts | 847 | 0.04727 | 0.01032 | 5.365e-06 |
| Sum basophil neutrophil counts | 1439 | 0.04359 | 0.008549 | 3.873e-07 |
| Eosinophil percentage of white cells | 928 | 0.03747 | 0.009325 | 6.336e-05 |
| Mean platelet volume | 1191 | 0.03629 | 0.009348 | 0.0001092 |
| Eosinophil percentage of granulocytes | 952 | 0.03584 | 0.008809 | 5.117e-05 |
| White blood cell count (basophil) | 410 | 0.03391 | 0.01397 | 0.01567 |
| Neutrophil percentage of granulocytes | 904 | 0.03347 | 0.008491 | 8.712e-05 |
| Basophil percentage of white cells | 570 | 0.03256 | 0.0123 | 0.008353 |
| Platelet count | 1272 | 0.02486 | 0.008316 | 0.002851 |
| Lymphocyte percentage of white cells | 902 | 0.02416 | 0.008679 | 0.005495 |
| Neutrophil percentage of white cells | 864 | 0.02262 | 0.008719 | 0.009632 |
| Monocyte count | 847 | 0.02132 | 0.01162 | 0.06682 |
| Myeloid white cell count | 1119 | 0.02101 | 0.009583 | 0.02854 |
| Basophil percentage of granulocytes | 314 | 0.01761 | 0.01331 | 0.1867 |
| Granulocyte count | 898 | 0.01491 | 0.009825 | 0.1294 |
| Sum neutrophil eosinophil counts | 904 | 0.01447 | 0.01027 | 0.1593 |
| Neutrophil count | 848 | 0.0134 | 0.009944 | 0.1783 |
| White blood cell count | 1199 | 0.01332 | 0.01015 | 0.1895 |
| Lymphocyte counts | 759 | 0.01142 | 0.01119 | 0.308 |
| Monocyte percentage of white cells | 1057 | 0.01123 | 0.009493 | 0.2372 |
| Plateletcrit | 1477 | 0.01056 | 0.007575 | 0.1636 |
| Granulocyte percentage of myeloid white cells | 807 | 0.01023 | 0.01064 | 0.3367 |